clear
clear matrix
clear mata
cap log close
set more 1
set maxvar 10000


***********************************************************************************************************
** Setup files: Huber, Schelker, Strittmatter, “Direct and Indirect Effects based on Changes-in-Changes” **
***********************************************************************************************************


global data "...\"


****************************************************
**** Appliation II: Paid Maternity Leave Reform ****
****************************************************


** Import dataset **
foreach v in 2004 2006 2007 {
	import delim "${data}SAKE\SAKE`v'.csv", varnames(1) delim(";")  clear
	gen year= `v'
	order year
	save "${data}SAKE\SAKE`v'.dta", replace
}
use "${data}SAKE\SAKE2004.dta", clear
append using "${data}SAKE\SAKE2006.dta"
append using "${data}SAKE\SAKE2007.dta"

** Original variable labels ** 
label var b0000 "(UV) Arbeitsmarktstatus"
label var b011d "Interview datum"
tostring b011d, gen(datehelp)
gen strlength = ustrlen(datehelp)
replace datehelp = "0" + datehelp if strlength < 6
gen double interview = date(datehelp, "DM20Y")
format interview %td
label var b015 "Haushaltsnummer"
label var b017 "Kanton"
label var b019 "Gemeindenummer"
label var b022 "Jahr"
label var bb02 "Haushaltsgrsse"
label var bb03a "Alter Person"
label var bwu1 "(UV) Jhrl.Erwerbseink.ZP,brutto"
label var bwu2 "(UV) Jhrl.Erwerbseink.ZP,netto"
label var ek201 "Erwerbstätige: HTK/TK-Mutter'urlaub(Wo)le.J.(Apr-Dez)"
label var ek202 "Erwerbstätige: HTK/TK-Mutter'urlaub(Wo)dies.J.(Jan-Mr)"
label var ak201 "Arbeitslose: HTK/TK-Mutter'urlaub(Wo)le.J.(Apr-Dez)"
label var ak202 "Arbeitslose: HTK/TK-Mutter'urlaub(Wo)dies.J.(Jan-Mr)"
label var nk201 "Nicht-Erwerbstätige: HTK/TK-Mutter'urlaub(Wo)le.J.(Apr-Dez)"
label var nk202 "Nicht-Erwerbstätige: HTK/TK-Mutter'urlaub(Wo)dies.J.(Jan-Mr)"
label var is01 "Geschlecht"
label var is02 "Geburtsjahr"
label var is03 "Zivilstand"
label var sex "GESCHLECHT"
order b015 b017 b022 b011d interview


** Subsample between 20-59 years **
drop if bb03a<20
drop if bb03a>=60


** Create variables **
rename b015 PID

* Group variables
gen age = bb03a
gen female = 0 if sex == 1
replace female = 1 if sex == 2
gen posttreatment = 0 if year == 2004  // treatment (paid materinity leave) introduced on July 1st, 2005
replace posttreatment = 1 if year>=2006
gen highfertility = 0 if female == 1 & age>45  // high-fertility defined between 20-39; low-fertility definded at 45+
replace highfertility = 1 if female == 1 & (age>=20 & age<40)
gen maternity = 0 // motherhood in year t (defined as maternity leave (paid or unpaid) -- > ek201, ek202)
replace maternity = 1 if ek201 > 0 | ek202 > 0 | ak201 > 0 | ak202 > 0 | nk201 > 0 | nk202 > 0 
tab maternity year

* outcomes
gen grossincome = bwu1
replace grossincome = . if bwu1 < 0 // negative values are "don't know (-7)", "no answer (-8)", "didn't ask (-9)"
gen netincome = bwu2
replace netincome = . if bwu1 < 0 // negative values are "don't know (-7)", "no answer (-8)", "didn't ask (-9)"

* Drop all obs that are on maternity in 2004
drop if maternity == 1 & year == 2004

* Construct balanced panel: only observations that are observable in at pre- and post-treatment period
bysort PID: gen nyear=[_N] 
duplicates report PID year 
keep if nyear == 3

* Mediator (maternity episode in either 2006 or 2007)
gen mediator = 0
bysort PID: egen mediatorhelp = total(maternity)
replace mediator = 1 if mediatorhelp>0 
drop mediatorhelp

* Variable labels
label var grossincome "gross income per year"
label var netincome "net income per year"
label var posttreatment "post-treatment dummy: materinity leave for year>2005"
label var female "female (1), male (0)"
label var highfertility "high-fertility females: females 20-40 years"
label var maternity "maternity leave in year t"
label var age "age of respondent"
label var mediator "1: for all those with materinty episode, else 0"

keep PID year interview grossincome netincome female highfertility maternity posttreatment mediator age 
save "${data}SAKE\SAKE_2004_2006-2007.dta", replace


** Sum stats **

sum grossincome netincome female highfertility maternity mediator age



